{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import sys, os  \n",
    "import quippy as qp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ase.io import read,write\n",
    "from ase.visualize import view"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sys.path.insert(0,'../')\n",
    "sys.path.insert(0,'../tools')\n",
    "from GlobalSimilarity import get_environmentalKernels, get_globalKernel\n",
    "from CV import CrossValidation\n",
    "from krr import KRR,dump_json,load_json,dump_data,load_data,score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute a kernel and save it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "frames = qp.AtomsList('./small_molecules.xyz',stop=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soap_params = dict(cutoff=3, nmax=6, lmax=6, gaussian_width=0.4,\n",
    "                    cutoff_transition_width=0.5, centerweight=1.,nocenters=[],\n",
    "                   chem_channels=True, is_fast_average=False,\n",
    "                   islow_memory=False,nthreads=1,nchunks=1,nprocess=1)\n",
    "\n",
    "environmentalKernels = get_environmentalKernels(atoms=frames,**soap_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "kernel_params = dict(kernel_type='average', zeta=2, normalize_global_kernel=True)\n",
    "globalKernel = get_globalKernel(environmentalKernels,**kernel_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prefix = './'\n",
    "fn = 'my_kernel_matrix'\n",
    "metadata = dict(soap_params=soap_params,fn=fn+'.npy',\n",
    "                kernel_params=kernel_params)\n",
    "dump_data(prefix+fn+'.json',metadata,globalKernel)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train and save a KRR model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "params, Kmat = load_data('./my_kernel_matrix.json')\n",
    "\n",
    "train = range(100)\n",
    "\n",
    "Kmat_train = Kmat[np.ix_(train,train)]\n",
    "y_train = np.load('./small_molecules-dHf_peratom.npy')[train]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = KRR(sigma=1e-1,csi=1)\n",
    "model.train(Kmat_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "state = model.pack()\n",
    "dump_json('./my_krr_model.json',state)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Predict with saved model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "params, Kmat = load_data('./my_kernel_matrix.json')\n",
    "\n",
    "train = range(100)\n",
    "test = range(50,100)\n",
    "\n",
    "Kmat_test = Kmat[np.ix_(train,test)]\n",
    "\n",
    "y_test = np.load('./small_molecules-dHf_peratom.npy')[test]\n",
    "\n",
    "model_state = load_json('./my_krr_model.json')\n",
    "model = KRR().unpack(model_state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(Kmat_test)\n",
    "\n",
    "print 'MAE={:.3e} RMSE={:.3e} SUP={:.3e} R2={:.3e} CORR={:.3e}'.format(*score(y_pred,y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Kfold CV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "params, Kmat = load_data('./my_kernel_matrix.json',mmap_mode=None)\n",
    "y = np.load('./small_molecules-dHf_peratom.npy')[:100]\n",
    "params = dict(sigma=1e-1,csi=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "scoreTest, err_scoreTest = CrossValidation(Kmat,y,params,Nfold=4,seed=10)\n",
    "print 'MAE={:.3e} RMSE={:.3e} SUP={:.3e} R2={:.3e} CORR={:.3e}'.format(*scoreTest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
